# compute distances to closest new apartment buildings
# -------------------------------------------------------------------- #
# prelims ####
# -------------------------------------------------------------------- #
# clear environment
rm(list = ls())

# import relevant libraries
library(haven)
library(rgdal)
library(raster)
library(readxl) 
library(classInt)
library(Hmisc) 
library(GISTools) 
library(tidyverse)
library(units)
library(sf) 
library(plm)
library(DataCombine) 
library(rapportools) 

# directories
dir <- paste0("PATH_TO_MAIN_DIRECTORY_HERE")
dta_path <- paste0(dir, "data/int/08_900.dta")
gis_path <- paste0(dir,"orig/catastro/08_900_UH_2020-04-24_SHF/PARCELA/")

# -------------------------------------------------------------------- #
# prepare data ####
# -------------------------------------------------------------------- #

# catastro numerical data
NumData <- read_dta(dta_path) %>% 
  subset(House == 1 |Apartment == 1 ) %>% 
  select(c("PlotCode", "PropOrder2", "PropSqm", "YearConst")) %>% 
  subset(PropSqm > 10)  %>%  
  distinct(PlotCode, .keep_all = TRUE) %>% 
  subset(YearConst >= 1990)

# catastro shape
PlotShp <- st_read(stringsAsFactors = FALSE,
                   dsn = path.expand(sprintf("%s", gis_path, "PARCELA"))
                   , query="SELECT REFCAT FROM \"PARCELA\" WHERE FECHABAJA >= 30000000 AND PARCELA != '09000'") %>%
  st_transform('+proj=utm +zone=31 +ellps=GRS80 +units=m +no_defs')

# census tract numerical info
CATCensus <- read_dta(paste0(dir, "data/int/catastro_census_neigh_08_900.dta")) %>%
  subset(year == 2016)

# census tracts shape
RefYear <- 2016
censusfile <- paste0("bseccenv10sh1f1_", RefYear,"0101_0")
cen_path <- paste0(dir,"orig/idescat/Seccions Censals 2002-2016/", censusfile)
varselect <- c("MUNICIPI","DISTRICTE","SECCIO","geometry")

# census spatial data
CensShp2016 <- st_read(dsn = path.expand(sprintf("%s", cen_path, censusfile)))  %>%
  select(varselect) %>%
  st_transform('+proj=utm +zone=31 +ellps=GRS80 +units=m +no_defs')
CensShp2016$cent <- st_centroid(CensShp2016$geometry) # get centroid
# rename tract variable
names(CensShp2016)[names(CensShp2016)=="SECCIO"] <- "CSEC" %>%
  as.character()
# add municipality and province identifier
names(CensShp2016)[names(CensShp2016)=="DISTRICTE"] <- "CDIS"
CensShp2016$CSEC <-  as.character(CensShp2016$CSEC)
CensShp2016$CDIS <-  as.character(CensShp2016$CDIS)
CensShp2016$CPRO <- substr(CensShp2016$MUNICIPI,start = 1,stop = 2)
CensShp2016$CMUN <- substr(CensShp2016$MUNICIPI,start = 3,stop = 5)
CensShp2016$CUSEC <- paste0(CensShp2016$CPRO,CensShp2016$CMUN,CensShp2016$CDIS,CensShp2016$CSEC)
# only BCN
CensShp2016_08_900 <- CensShp2016 %>% subset(CPRO =="08" & CMUN == "019")

# -------------------------------------------------------------------- #
# new apartments each year ####
# -------------------------------------------------------------------- #
# properties that are in NumData
NewApts <- PlotShp %>% subset(REFCAT %in% unique(NumData$PlotCode))

# year of construction
NewApts <- merge(x=NewApts, y=NumData[c("PlotCode","YearConst")], 
                        by.x = "REFCAT", by.y = "PlotCode", all.x = FALSE, all.y = FALSE) 

# census tract of new plot
NewApts <- merge(x = as.data.frame(NewApts), y = CATCensus,
                 by.x = c("REFCAT"), by.y = c("PlotCode"), all.x = TRUE, all.y = FALSE) %>%
  subset(select = -c(year)) 

# centroids
NewApts$cent <- st_centroid(NewApts$geometry,TRUE)

# store
save(NewApts, file = paste0(dir, "data/int/NewApts_08_900.RData"))
  
# -------------------------------------------------------------------- #
# distances from individuals in the survey ####
# -------------------------------------------------------------------- #

# addresses of individuals in survey
Addresses <- read_dta(paste0(dir, "data/int/address_match.dta")) %>% 
  select(c(PlotCode)) %>% 
  distinct(PlotCode, .keep_all = TRUE)

# add cartography
AddShp <- merge(x = Addresses, y = PlotShp, 
                by.x = c("PlotCode"), by.y = c("REFCAT"),
                all.y = FALSE)

# transform sf object
AddShp <- AddShp %>% st_sf() %>% st_transform('+proj=utm +zone=31 +ellps=GRS80 +units=m +no_defs')

# centroid
AddShp$cent <- st_centroid(AddShp$geometry,TRUE)

# transform sf object
NewApts <- NewApts %>% st_sf() %>% st_transform('+proj=utm +zone=31 +ellps=GRS80 +units=m +no_defs')

# distances to closest new buildings in year
nrows <- nrow(AddShp)

for(year in 2019:2010){
  
  # new apartments in reference year
  NewAptsYr <- subset(NewApts, YearConst == year) %>%
    select(c(REFCAT, cent))
  
  # vars to fill
  AddShp$Dist1 <- 0
  AddShp$Dist2 <- 0
  AddShp$Dist3 <- 0
  AddShp$Dist4 <- 0
  AddShp$Dist5 <- 0
  AddShp$PlotDist1 <- 0
  AddShp$PlotDist2 <- 0
  AddShp$PlotDist3 <- 0
  AddShp$PlotDist4 <- 0
  AddShp$PlotDist5 <- 0
  
  # claculate distances
  for(i in 1:nrows){
    
    # calculate all distances between tract of reference and new apartments
    x <- st_distance(AddShp[[3]][i], NewAptsYr[[2]]) %>% as.numeric()  %>% round(digits = 2)
    
    # order by distance
    ord_dist <- sort(x, decreasing = FALSE)
  
    # get the positions
    pos <- match(x, ord_dist)
    p1 <- match(1,unlist(pos))
    p2 <- match(2,unlist(pos))
    p3 <- match(3,unlist(pos))
    p4 <- match(4,unlist(pos))
    p5 <- match(5,unlist(pos))
    
    # distances
    AddShp$Dist1[i] <- ord_dist[1]
    AddShp$Dist2[i] <- ord_dist[2]
    AddShp$Dist3[i] <- ord_dist[3]
    AddShp$Dist4[i] <- ord_dist[4]
    AddShp$Dist5[i] <- ord_dist[5]
    # parcel codes
    AddShp$PlotDist1[i] <- NewAptsYr[[1]][p1]
    AddShp$PlotDist2[i] <- NewAptsYr[[1]][p2]
    AddShp$PlotDist3[i] <- NewAptsYr[[1]][p3]
    AddShp$PlotDist4[i] <- NewAptsYr[[1]][p4]
    AddShp$PlotDist5[i] <- NewAptsYr[[1]][p5]
    
  }
  
  # rename
  Dist1NewName <- paste("Dist1_", year, sep = "")
  Dist2NewName <- paste("Dist2_", year, sep = "")
  Dist3NewName <- paste("Dist3_", year, sep = "")
  Dist4NewName <- paste("Dist4_", year, sep = "")
  Dist5NewName <- paste("Dist5_", year, sep = "")
  
  PlotDist1NewName <- paste("PlotDist1_", year, sep = "")
  PlotDist2NewName <- paste("PlotDist2_", year, sep = "")
  PlotDist3NewName <- paste("PlotDist3_", year, sep = "")
  PlotDist4NewName <- paste("PlotDist4_", year, sep = "")
  PlotDist5NewName <- paste("PlotDist5_", year, sep = "")
  
  names(AddShp)[names(AddShp)=="Dist1"] <- Dist1NewName
  names(AddShp)[names(AddShp)=="Dist2"] <- Dist2NewName
  names(AddShp)[names(AddShp)=="Dist3"] <- Dist3NewName
  names(AddShp)[names(AddShp)=="Dist4"] <- Dist4NewName
  names(AddShp)[names(AddShp)=="Dist5"] <- Dist5NewName
  
  names(AddShp)[names(AddShp)=="PlotDist1"] <- PlotDist1NewName
  names(AddShp)[names(AddShp)=="PlotDist2"] <- PlotDist2NewName
  names(AddShp)[names(AddShp)=="PlotDist3"] <- PlotDist3NewName
  names(AddShp)[names(AddShp)=="PlotDist4"] <- PlotDist4NewName
  names(AddShp)[names(AddShp)=="PlotDist5"] <- PlotDist5NewName
  
}

# remove geometry
AddShp$geometry <- NULL
AddShp <- AddShp %>% select(-c(cent))
AddShp <- AddShp %>% distinct(PlotCode, .keep_all = TRUE) 

# store
save(AddShp, file = paste0(dir, "data/int/Survey_Dist_NewApt.RData"))
write_dta(data = AddShp, paste0(dir, "data/int/Survey_Dist_NewApt.dta"))

# -------------------------------------------------------------------- #
# distances from census tracts ####
# -------------------------------------------------------------------- #
lastyear <- 2019
firstyear <- 2000

## part 1: get distances around new plots ####
# -------------------------------------------------------------------- #
# note: very slow
# load new apartments data
load(paste0(dir,"data/int/NewApts_08_900.RData"))

# draw a buffer around the centroid of new apartments
NewApts$Buff50 <- st_buffer(NewApts$cent, dist = 50)
NewApts$Buff100 <- st_buffer(NewApts$cent, dist = 100)
NewApts$Buff200 <- st_buffer(NewApts$cent, dist = 200)
NewApts$Buff350 <- st_buffer(NewApts$cent, dist = 350)
NewApts$Buff500 <- st_buffer(NewApts$cent, dist = 500)
NewApts$Buff750 <- st_buffer(NewApts$cent, dist = 750)
NewApts$Buff1000 <- st_buffer(NewApts$cent, dist = 1000)

# initialize vector to store results
NewApts_TC_Cont <- NULL

# function to calculate distances
CalcDist <- function(el){
  st_distance(x = newapt_cent, y = TRACTS_CENT[[1]][el]) %>% as.numeric() %>% round(digits = 2)
}

for(year in lastyear:firstyear){ 
  
  # new apartments in the year
  NewAptsYEAR <- NewApts %>% subset(YearConst == year) %>% 
    subset(select = c(REFCAT, YearConst, CUSEC, cent, Buff1000)) 
  # get list of rows in from the relevant year
  YearRows <- nrow(NewAptsYEAR)
  
  # list of tracts within 1000m buffer
  TRACTS1000 <- st_intersects(x = NewAptsYEAR$Buff1000, y = CensShp2016_08_900$cent)
  
  # vectors to fill
  NewAptsYEAR$TractList <- NA
  NewAptsYEAR$DistList <- NA
  
  # fill rows within a year
  for(r in 1:YearRows){
    
    # centroid of new apartment
    newapt_cent <- NewAptsYEAR$cent[r]
    # centroid of intersected census tracts
    TRACTS_CENT <- CensShp2016_08_900[TRACTS1000[[r]], c("cent")] %>% 
      st_set_geometry(NULL) %>% as.list(all.names=FALSE)
    # CUSEC of intersected tracts
    TRACTS_CUSEC <-  CensShp2016_08_900[TRACTS1000[[r]], c("CUSEC")] %>% 
      st_set_geometry(NULL) %>% as.list(all.names=FALSE)
    
    # intersected tracts
    CLOSE_TRACTS <- TRACTS_CUSEC[[1]] %>% list()
    # calculate distance to every element in the list
    DISTANCES <- lapply(TRACTS_CENT[[1]], CalcDist) %>% list()
    
    # select max of census tracts considered
    NCoseTracts <- length(CLOSE_TRACTS[[1]]) # number of close tracts
    TractList <- vector(mode="logical", length = NCoseTracts)
    DistList <- vector(mode="logical", length = NCoseTracts)
    
    # get closest tracts
    if(NCoseTracts>0){ 
      
      for(i in 1:NCoseTracts){
    
      # select closest tracts
      element <- sort(unlist(DISTANCES))[i]
      pos <- match(element,unlist(DISTANCES)) 
      TractList[i] <- unlist(CLOSE_TRACTS[[1]])[pos]
      DistList[i] <- unlist(DISTANCES[[1]])[pos]
    
     } 
    
    # add to NewApts
    NewAptsYEAR$TractList[r] <- TractList %>% list()
    NewAptsYEAR$DistList[r] <- DistList %>% list()
    
    } 
  } 
  
  # put together
  NewApts_TC_Cont <- rbind(NewApts_TC_Cont, NewAptsYEAR)
  
}

# store
save(NewApts_TC_Cont, 
     file = paste0(dir, "data/int/NewApts_TC_Cont_08_900.RData"))

## part 2: link events to census tracts ####
# -------------------------------------------------------------------- #

# data with census tracts
Analysis_Cont <- CensShp2016_08_900 %>% 
  st_set_geometry(NULL) %>% 
  subset(select = -c(MUNICIPI,cent)) 

# initialize matrix to store results
CONT <- NULL

# number of rows
NROWS <- nrow(Analysis_Cont)

for(year in 2000:lastyear){
  
  NEWAPTSYEAR <- NewApts_TC_Cont[NewApts_TC_Cont$YearConst == year, ] 
  
  # identify year
  Analysis_Cont$Year <- year
  
  # initialize vectors to fill
  Analysis_Cont$TREAT1000 <- 99
  Analysis_Cont$TractList <- NA
  Analysis_Cont$DistList <- NA
  Analysis_Cont$EVENT <- 99
  
  for(r in 1:NROWS){ 
    
    # to fill
    DISTLIST <- NULL
    EVLIST <- NULL
    
    # identify whether there is and event or not
    Analysis_Cont$EVENT[r] <- as.numeric(Analysis_Cont$CUSEC[r] %in% unlist(NEWAPTSYEAR$CUSEC))
    # identify treatment status
    Analysis_Cont$TREAT1000[r] <- as.numeric(Analysis_Cont$CUSEC[r] %in% unlist(NEWAPTSYEAR$TractList))
    
    # if tract within 1000m from an event tract
    if(Analysis_Cont$TREAT1000[r]==1){
      
      rowmatch <- which(grepl(Analysis_Cont$CUSEC[r], NEWAPTSYEAR$TractList))
 
      for(rowel in 1:length(rowmatch)){ 
        
          # row element in NEWAPTS
          rowmatchel <- rowmatch[rowel]
          # position in list in row
          pos <- match(Analysis_Cont$CUSEC[r], unlist(NEWAPTSYEAR$TractList[rowmatchel]))
          # distance
          DIST1000 <- NEWAPTSYEAR$DistList[[rowmatchel]][pos] 
          # event
          EV1000 <- NEWAPTSYEAR$CUSEC[[rowmatchel]]
          DISTLIST <- rbind(DISTLIST, DIST1000)
          EVLIST <- rbind(EVLIST, EV1000)
          
      }
      
      # order tracts by distance - vectors to fill
      TractList <- NA
      DistList <- NA
      
      for(i in 1:length(rowmatch)){
        
        # select closest tracts
        element <- sort(unlist(DISTLIST))[i] 
        pos <- match(element,unlist(DISTLIST))
        TractList[i] <- unlist(EVLIST[[pos]])
        DistList[i] <- unlist(DISTLIST[[pos]])
        
      } 
      
      # fill
      Analysis_Cont$TractList[r] <- TractList %>% list()
      Analysis_Cont$DistList[r] <- DistList %>% list()

    }
    
    # not treated - set to na
    if(Analysis_Cont$TREAT1000[r]==0){
      
      Analysis_Cont$TractList[r] <- NA
      Analysis_Cont$DistList[r] <- NA
      
    }
    
    # event tract
    if(Analysis_Cont$EVENT[r]==1){
      
      Analysis_Cont$TREAT1000[r] <- NA
      Analysis_Cont$DistList[r] <- NA
      Analysis_Cont$TractList[r] <- NA

    }
  } 
  
  # put together
  CONT <- rbind(CONT, Analysis_Cont)
  
} 

# store
save(CONT, file = paste0(dir, "data/int/CSEC_Dist_NewApt.RData"))

# for stata
CONTSTATA <- CONT 
NROWS <- nrow(CONT)

# initialize vectors
CONTSTATA$EvTract1 <- NA
CONTSTATA$Dist1 <- NA
CONTSTATA$EvTract2 <- NA
CONTSTATA$Dist2 <- NA
CONTSTATA$EvTract3 <- NA
CONTSTATA$Dist3 <- NA
CONTSTATA$EvTract4 <- NA
CONTSTATA$Dist4 <- NA
CONTSTATA$EvTract5 <- NA
CONTSTATA$Dist5 <- NA
CONTSTATA$EvTract6 <- NA
CONTSTATA$Dist6 <- NA
CONTSTATA$EvTract7 <- NA
CONTSTATA$Dist7 <- NA
CONTSTATA$EvTract8 <- NA
CONTSTATA$Dist8 <- NA
CONTSTATA$EvTract9 <- NA
CONTSTATA$Dist9 <- NA
CONTSTATA$EvTract10 <- NA
CONTSTATA$Dist10 <- NA
CONTSTATA$EvTract11 <- NA
CONTSTATA$Dist11 <- NA
CONTSTATA$EvTract12 <- NA
CONTSTATA$Dist12 <- NA

for(r in 1:NROWS){

  CONTSTATA$EvTract1[r] <- CONTSTATA$TractList[[r]][1]
  CONTSTATA$Dist1[r] <- CONTSTATA$DistList[[r]][1]

  CONTSTATA$EvTract2[r] <- CONTSTATA$TractList[[r]][2]
  CONTSTATA$Dist2[r] <- CONTSTATA$DistList[[r]][2]
  
  CONTSTATA$EvTract3[r] <- CONTSTATA$TractList[[r]][3]
  CONTSTATA$Dist3[r] <- CONTSTATA$DistList[[r]][3]
  
  CONTSTATA$EvTract4[r] <- CONTSTATA$TractList[[r]][4]
  CONTSTATA$Dist4[r] <- CONTSTATA$DistList[[r]][4]
  
  CONTSTATA$EvTract5[r] <- CONTSTATA$TractList[[r]][5]
  CONTSTATA$Dist5[r] <- CONTSTATA$DistList[[r]][5]
  
  CONTSTATA$EvTract6[r] <- CONTSTATA$TractList[[r]][6]
  CONTSTATA$Dist6[r] <- CONTSTATA$DistList[[r]][6]
  
  CONTSTATA$EvTract7[r] <- CONTSTATA$TractList[[r]][7]
  CONTSTATA$Dist7[r] <- CONTSTATA$DistList[[r]][7]
  
  CONTSTATA$EvTract8[r] <- CONTSTATA$TractList[[r]][8]
  CONTSTATA$Dist8[r] <- CONTSTATA$DistList[[r]][8]
  
  CONTSTATA$EvTract9[r] <- CONTSTATA$TractList[[r]][9]
  CONTSTATA$Dist9[r] <- CONTSTATA$DistList[[r]][9]
  
  CONTSTATA$EvTract10[r] <- CONTSTATA$TractList[[r]][10]
  CONTSTATA$Dist10[r] <- CONTSTATA$DistList[[r]][10]
  
  CONTSTATA$EvTract11[r] <- CONTSTATA$TractList[[r]][11]
  CONTSTATA$Dist11[r] <- CONTSTATA$DistList[[r]][11]
  
  CONTSTATA$EvTract12[r] <- CONTSTATA$TractList[[r]][12]
  CONTSTATA$Dist12[r] <- CONTSTATA$DistList[[r]][12]

}

# remove lists
CONTSTATA <- CONTSTATA %>% subset(select = -c(TractList,DistList))

# store
write_dta(data = CONTSTATA, paste0(dir, "data/int/CSEC_Dist_NewApt.dta"))

# -------------------------------------------------------------------- #
# closing ####
# -------------------------------------------------------------------- #

rm(list = ls())
